/*	This program creates a working dataset for main analysis sample centered
at the month of 50th birthday, for ages 45 to 57.*/


***** Set directories 
local dir_raw 		"~/Dropbox/Retirement gaming/raw"
local dir_clean 	"~/Dropbox/Retirement gaming/clean"
local dir_output 	"~/Dropbox/Retirement gaming/output/dataverse"

local indataname "mainsample_age50.dta" 
local savedataname "mainsample_age50analysis.dta" 


********************************************
**	PREPARE DATA 
********************************************
quietly {
use "`dir_clean'/`indataname'", clear

sort i self_empl t
bysort i self_empl: g order=_n

* Drop very high earnings before age 50
cap drop aux
g aux=W if age_centered<0
bysort sample_self_empl: egen aWpre50=mean(aux) 
drop aux
sum aWpre50 if sample_self_empl==1, det
global topawpre=r(p99)

g W_rel=W/aWpre50

* Earnings relative to age 49 (or when first observed)
cap drop aux
g aux=W if age_centered==-1
bysort i: egen W49=mean(aux) 
drop aux
g aux=W if age_centered==-2
bysort i: egen W48=mean(aux) 
drop aux
g aux=W if age_centered==-3
bysort i: egen W47=mean(aux) 
drop aux

g aux=W if order==1 
bysort i: egen Wfirst=mean(aux) 
drop aux
sum W49 W48 Wfirst

g W_rel49=W/W49
sum W_rel49
replace W_rel49=W/W48 if W49==. & W48!=. 
sum W_rel49
replace W_rel49=W/W47 if W49==. & W48==. & W47!=. 
sum W_rel49
replace W_rel49=W/Wfirst if W_rel49==. 
sum W_rel49

sum W W_rel49 W_rel
sum W_rel49 if age_cent==-1


bysort i: egen minage=min(age)
bysort i: egen maxage=max(age)
sum age minage maxage 

gen 	ciiu1 = 1 if ciiu2<10											// Agriculture and mining	
replace ciiu1 = 2 if (ciiu2>=10 & ciiu2<=33) | ciiu2==95 				// Manufacturing 
replace ciiu1 = 3 if ciiu2>=35 & ciiu2<=39 								// Energy and waste disposal
replace ciiu1 = 4 if ciiu2>=41 & ciiu2<=43  							// Construction
replace ciiu1 = 5 if (ciiu2>=45 & ciiu2<=47) | (ciiu2>=55 & ciiu2<=56)	// wholesale and retail, restaurants, hotels
replace ciiu1 = 6 if (ciiu2>=49 & ciiu2<=53) | ciiu2==61				// transport and communications 
replace ciiu1 = 7 if (ciiu2>=62 & ciiu2<=82) | ciiu2==96				// services
replace ciiu1 = 8 if (ciiu2>=84 & ciiu2<=94) | (ciiu2>=58 & ciiu2<=60)| ciiu2==97 // public admin, social and domestic services

* ciiu2 when first obs
cap drop aux
g aux=ciiu2  if order==1 
replace aux=0 if aux==.
bysort i: egen ciiu2_1stobs=max(aux) 
drop aux
label var ciiu2_1stobs "Industry when 1st obs"


// sectors using the classification of bc_rama in ECH
g manufacturing		= ciiu1==2
g retailhospitality	= ciiu1==5 
g transportenergy	= ciiu1==6 | ciiu1==3
g services 			= ciiu1==7 | ciiu1==8 | ciiu1==4 // includes construction not in construction pension system

foreach var in manufacturing retailhospitality transportenergy services {
	replace `var'=. if ciiu2==.
}
label var manufacturing 	"Manufacturing"
label var retailhospitality "Retail, Restaurants, Hotels"
label var transportenergy 	"Transport, Communications, Energy"
label var services 			"Services, Other"

g sector=1 if manufacturing==1
replace sector=2 if retailhospitality==1
replace sector=3 if transportenergy==1
replace sector=4 if services==1

g jobstart=mofd(Fing)
format jobstart %tm
g tenure=t-jobstart
sum tenure if self_empl==1, det
sum tenure if empl==1, det
g tenure_1yrs=tenure>=(12*1) if tenure!=.

********************************************************************************
**** PREPARE DATA FOR EVENT STUDIES 							
********************************************************************************

cap drop auxc
g auxc=1

* 50-53 dummy
g post5053=age_centered>=0 & age_centered<4
label var post5053 "Age 50-53"
* Post-50 and 54 dummies
g post54=age_centered>=4
label var post54 "Age$\geq$54"
* Pre-48 
g pre48=age_centered<=-2
label var pre48 "Age$\leq$48"
* Post 50
g post50=age_centered>=0 
label var post50 "Age$\geq$50"
* Pre 46-48 (to drop two dummies)
g pre4648=age_centered>-5 & age_centered<=-2
label var pre4648 "Age 46-48"

*Interactions age trend and shifts
g aget=agemonths_centered/12
gen aget_post50 = aget*post50
gen aget_post54 = aget*post54

label var aget "Age trend"
label var aget_post50 "Age$\geq$50 x Age trend"


* Reports MCB (ficto)
g Wround=round(W,.01)
g rfictoround=round(rficto,.01)
sum Wround W rfictoround rficto
g reports_ficto= Wround==rfictoround if self_empl==1
sum reports_ficto if self_empl==1

g reports_overficto= Wround>rfictoround if self_empl==1
replace reports_overficto=0 if self_empl!=1

** NEW RR1 FIRM SIZE CATEGORIES **
replace ndep_cat=ndep_cat+1
replace ndep_cat=0 if ndep==0
label define ndep_cat 0 "no employees" 1 "1-4" 2 "5-9" 3 "10-19" 4 "20-49" 5 "50-249" 6"250 plus" 
label values ndep_cat ndep_cat

*Dummies for descriptives
g noempl	=ndep_cat==0
g micro		=ndep_cat==1 
g micro2	=ndep_cat==2
g small		=ndep_cat<=2
g larger	=ndep_cat>=3 & ndep_cat<.

label var noempl "No employees"
label var micro "Firm size $<5$ workers"
label var micro2 "Firm size 5-9 workers"
label var small	"Firm size 10-49 workers"
label var larger "Firm size $\geq$10 workers"

*Wages relative to self-employed MCB
g Wficto=W/rficto
label var Wficto "Earnings/Self-emp. min."
		
label var year "Year"
label var cohort "Birth cohort"
label var age "Age"
label var prop_mempl "Prop. time employed" 
label var prop_mself_empl "Prop. time self employed" 
label var W "Reported earnings (1,000 UYP)"

* GROUPS OF COHORTS (TRANSITION REGIME)
g oldsystem=birth_month<tm(1936m4) 
g transition=birth_month>=tm(1936m4) & birth_month<tm(1956m4) 
foreach X in aget post5053 post54 pre48 post50 aget_post50 {
g `X'_transition= `X'*transition
}
label var transition			"Transition"
label var post5053_transition 	"Transition x Age 50-53"
label var post54_transition 	"Transition x Age$\geq$54"
label var pre48_transition 		"Transition x Age$\leq$48"
label var post50_transition		"Transition x Age$\geq$50"
label var aget_transition	 	"Transition x Age trend"
label var aget_post50_transition "Transition x Age$\geq$50 x Age tr."

* WITH EMPLOYEES WHEN FIRST OBSERVED
cap drop aux
g aux=ndep>0 & ndep<. if order==1 & self_empl==1
replace aux=0 if aux==.
bysort i: egen withempl=max(aux) 
drop aux
foreach X in aget post5053 post54 pre48 post50 aget_post50 {
g `X'_withempl= `X'*withempl
}
label var withempl				"Employer"
label var post5053_withempl 	"Employer x Age 50-53"
label var post54_withempl 		"Employer x Age$\geq$54"
label var pre48_withempl 		"Employer x Age$\leq$48"
label var post50_withempl		"Employer x Age$\geq$50"
label var aget_withempl	 		"Employer x Age trend"
label var aget_post50_withempl 	"Employer x Age$\geq$50 x Age tr."

* MCB WHEN FIRST OBSERVED
cap drop aux
g aux=reports_ficto==1  if order==1 & self_empl==1
replace aux=0 if aux==.
bysort i: egen ficto_1stobs=max(aux) 
drop aux
label var ficto_1stobs "Reported min 1st obs"

foreach var in post5053 post54 pre48 aget post50 aget_post50 {
g `var'_ficto=`var' * ficto_1stobs
}
label var post5053_ficto 		"Reported min x Age 50-53"
label var post54_ficto 			"Reported min x Age$\geq$54"
label var pre48_ficto 			"Reported min x Age$\leq$48"
label var post50_ficto  		"Reported min x Age$\geq$50"
label var aget_ficto 			"Reported min x Age trend"
label var aget_post50_ficto 	"Reported min x Age$\geq$50 x Age trend"

* REPORTED EARNINGS WHEN FIRST OBSERVED
cap drop aux
g aux=W  if order==1 
replace aux=0 if aux==.
bysort i: egen W_1stobs=max(aux) 
drop aux
label var W_1stobs "Reported earnings 1st obs"


* INTERACTIONS FOR DID
foreach var in post5053 post54 pre48 aget post50 aget_post50 {
g `var'_self_empl=`var' * self_empl
}
label var post5053_self_empl	"Self-employed x Age 50-53"
label var post54_self_empl 		"Self-employed x Age$\geq$54"
label var pre48_self_empl 		"Self-employed x Age$\leq$48"
label var post50_self_empl  	"Self-employed x Age$\geq$50"
label var aget_self_empl 		"Self-employed x Age trend"
label var aget_post50_self_empl "Self-employed x Age$\geq$50 x Age trend"

g pre4648_self_empl= pre4648 * self_empl
label var pre4648_self_empl 		"Self-employed x Age 46-48"

* LARGE SIZE FIRM (FOR EMPLOYEES)
cap drop aux
g aux=larger==1  if order==1 & empl==1
replace aux=0 if aux==.
bysort i: egen large_1stobs=max(aux) 
drop aux
label var large_1stobs "Large firm"



foreach X in aget post5053 post54 pre48 post50 aget_post50 {
g `X'_larger= `X'*large_1stobs
}
label var post5053_larger 	"Large firm x Age 50-53"
label var post54_larger 	"Large firm x Age$\geq$54"
label var pre48_larger 		"Large firm x Age$\leq$48"
label var post50_larger		"Large firm x Age$\geq$50"
label var aget_larger	 	"Large firm x Age trend"
label var aget_post50_larger "Large firm x Age$\geq$50 x Age trend"

* SMALL SIZE FIRM (FOR EMPLOYEES)
g small_1stobs = 1-large_1stobs
foreach X in aget post5053 post54 pre48 post50 aget_post50 {
g `X'_small= `X'*small_1stobs
}
label var post5053_small 	"Small firm x Age 50-53"
label var post54_small 		"Small firm x Age$\geq$54"
label var pre48_small 		"Small firm x Age$\leq$48"
label var post50_small		"Small firm x Age$\geq$50"
label var aget_small	 	"Small firm x Age trend"
label var aget_post50_small "Small firm x Age$\geq$50 x Age trend"

g pre4648_small= pre4648 * small_1stobs
label var pre4648_small 		"Small firm x Age 46-48"

* Observed at age 49
g aux=age_centered==-1
bysort i: egen obs49=max(aux)


** RR1 NEW VERSION FIRM SIZES WHEN FIRST OBS**
cap drop aux
g aux=ndep_cat  if order==1 
bysort i: egen ndep_cat_1stobs=min(aux) 
label values ndep_cat_1stobs ndep_cat
label var ndep_cat_1stobs "Firm size when 1st observed"
drop aux

cap drop aux
g aux=ndep  if order==1 
bysort i: egen ndep_1stobs=min(aux) 
label var ndep_1stobs "Firm size when 1st observed"
drop aux


* TENURE tt+ YEARS WHEN FIRST OBSERVED
cap drop aux
g aux=tenure_1yrs==1 & order==1
bysort i: egen tenure1yrs_1stobs=max(aux)
drop aux
label var tenure1yrs_1stobs "Tenure 1+ yrs" 

foreach X in aget post5053 post54 pre48 post50 aget_post50 {
g `X'_tenure1yrs= `X'*tenure1yrs_1stobs
}
label var post5053_tenure1yrs 	"Tenure 1+ yrs x Age 50-53"
label var post54_tenure1yrs 	"Tenure 1+ yrs x Age$\geq$54"
label var pre48_tenure1yrs 		"Tenure 1+ yrs x Age$\leq$48"
label var post50_tenure1yrs		"Tenure 1+ yrs x Age$\geq$50"
label var aget_tenure1yrs	 	"Tenure 1+ yrs x Age trend"
label var aget_post50_tenure1yrs "Tenure 1+ yrs x Age$\geq$50 x Age trend"

* TENURE tt+ YEARS AT AGE 49 (OR WHEN FIRST OBSERVED)
g aux=tenure_1yrs==1 & age_centered==-1
bysort i: egen tenure1yrs_at49=max(aux)
replace tenure1yrs_at49=tenure1yrs_1stobs if obs49==0
drop aux
label var tenure1yrs_at49 "Tenure 1+ yrs" 


*STAYERS IN FIRM
cap drop aux
g aux=j if order==1 & empl==1
bysort i: egen firmid_1stobs=min(aux) 
drop aux
g aux=j if empl==1 & age_centered==-1
bysort i: egen firmid_at49=min(aux)
replace firmid_at49=firmid_1stobs if obs49==0
g match_firm49 = empl==1 & j==firmid_at49

sum firmid_at49 match_firm49 if empl==1

g othpay=trem_2_max==1|trem_3_max==1|trem_4_max==1|trem_5_max==1

sum hrsmonth, det
replace hrsmonth=. if hrsmonth<r(p5)

g wagephr= W/hrsmonth if hrsmonth>0

g group=1 if self_empl==1
replace group=2 if empl==1 & small_1stobs==1
replace group=3 if empl==1 & small_1stobs==0


* Weekly work hours
g hrsweek = round(hrsmonth/4.3)

save "`dir_clean'/`savedataname'", replace

}

clear all 
exit
